/* Appendix Table 2 */



#delimit ;
clear all;
local outfile "GWgapr10_descIDI";
set more off;


di _n "$S_DATE $S_TIME";








********************************************************************************;
* (1) Number of job changes for men and women by age group;
********************************************************************************;

local varlist "snz_uid hp_pent age_cat female year Q_rest";
use `varlist' using GWgap_pr_IDI_v4, clear;

* a person is considered to have a job change if their hp_pent differs from in the previous year;

sort snz_uid year;

gen job_ch = 0 if snz_uid== snz_uid[_n-1] & (year==year[_n-1] + 1) & hp_pent!=""
	& hp_pent==hp_pent[_n-1];
replace job_ch = 1 if snz_uid== snz_uid[_n-1] & (year==year[_n-1] + 1) & hp_pent!="" 
	& hp_pent[_n-1]!="" & hp_pent!=hp_pent[_n-1];
	
* keeping restricted person years only;

keep if Q_rest==1;


* collapsing to the gender-year-age level;

collapse (sum) num_job_ch = job_ch (count) num_job = job_ch, by(year age_cat female);
label var num_job_ch "No. of ppl who worked current and previous yrs at diff hp pents (rr3)"; 
label var num_job "No. of ppl who worked current and previous yrs at same or diff hp pents (rr3)";

/* use next two lines to confidentialise; 
grr num_job_ch, base(3) seed(65234);
grr num_job, base(3) seed(642009);
*/
* drop next two lines when using conf code;
gen rr3num_job_ch = num_job_ch;
gen rr3num_job = num_job;


foreach var in num_job_ch num_job {;
	replace rr3`var' = .s if `var'<6;
};

replace num_job_ch = rr3num_job_ch;
replace num_job = rr3num_job;

drop rr3*;
des;

export excel using "`outfile'.xlsx", sheet("1 jobch b") sheetrep first(var) miss("S"); 






********************************************************************************;
* (2) Changes in average annual earnings for men and women by age group;
********************************************************************************;

local varlist "snz_uid max_gross_earn_yr max_mon_wkd hp_pent age_cat female year Q_rest lnWpm_hp indiv_avfte_hp";
use `varlist' using GWgap_pr_IDI_v4, clear;

* merging on total gross earnings in the year;

merge 1:1 snz_uid year hp_pent using GWgap_IDI_indivregdata, keep(master match) keepus(tot_gross_earn_yr) nogen;

* a person is considered to have a job change if their hp_pent differs from in the previous year;

sort snz_uid year;

gen job_ch = 0 if snz_uid== snz_uid[_n-1] & (year==year[_n-1] + 1) & hp_pent!=""
	& hp_pent==hp_pent[_n-1];
replace job_ch = 1 if snz_uid== snz_uid[_n-1] & (year==year[_n-1] + 1) & hp_pent!="" 
	& hp_pent[_n-1]!="" & hp_pent!=hp_pent[_n-1];
	
	
* constructing change in earnings;

#delimit ;
gen lnmax_gross_earn_yr = ln(max_gross_earn_yr);

gen mon_earn_hp = max_gross_earn_yr/max_mon_wkd;
label var mon_earn_hp "Average monthly earnings from hp pent in months worked for hp pent";

gen lnmon_earn_hp = ln(mon_earn_hp);
label var lnmon_earn_hp "Average monthly earnings from hp pent in months worked for hp pent (ln)";

gen lntot_gross_earn_yr = ln(tot_gross_earn_yr);

gen ch_lntot_earn = lntot_gross_earn_yr - lntot_gross_earn_yr[_n-1] if job_ch!=.;
gen ch_lnhp_earn = lnmax_gross_earn_yr - lnmax_gross_earn_yr[_n-1] if job_ch!=.;
gen ch_lnhpmon_earn = lnmon_earn_hp - lnmon_earn_hp[_n-1] if job_ch!=.;

gen lnhp_earnfte = ln(exp(lnWpm_hp)/indiv_avfte_hp);
label var lnhp_earnfte "Average monthly earnings/FTEs from hp pent (ln)";

gen chlnhp_earnfte = lnhp_earnfte - lnhp_earnfte[_n-1] if job_ch!=.;

keep if job_ch!=.;


* keeping observations in restricted sample only;

keep if Q_rest==1;


* replicating all obs to make an additional age category, total;

expand 2, gen(new);
replace age_cat = 9 if new==1;
drop new;


* collapsing to the gender-year-age level;

drop if year==2002;
gen i = 1;
collapse (sum) num_job = i 
	(p5) p5_tot = ch_lntot_earn p5_hp = ch_lnhp_earn p5_hpmon = ch_lnhpmon_earn p5_fte = chlnhp_earnfte
	(p10) p10_tot = ch_lntot_earn p10_hp = ch_lnhp_earn p10_hpmon = ch_lnhpmon_earn p10_fte = chlnhp_earnfte
	(p25) p25_tot = ch_lntot_earn p25_hp = ch_lnhp_earn p25_hpmon = ch_lnhpmon_earn p25_fte = chlnhp_earnfte
	(p50) p50_tot = ch_lntot_earn p50_hp = ch_lnhp_earn p50_hpmon = ch_lnhpmon_earn p50_fte = chlnhp_earnfte
	(p75) p75_tot = ch_lntot_earn p75_hp = ch_lnhp_earn p75_hpmon = ch_lnhpmon_earn p75_fte = chlnhp_earnfte
	(p90) p90_tot = ch_lntot_earn p90_hp = ch_lnhp_earn p90_hpmon = ch_lnhpmon_earn p90_fte = chlnhp_earnfte
	(p95) p95_tot = ch_lntot_earn p95_hp = ch_lnhp_earn p95_hpmon = ch_lnhpmon_earn p95_fte = chlnhp_earnfte
	(mean) m_tot = ch_lntot_earn m_hp = ch_lnhp_earn m_hpmon = ch_lnhpmon_earn m_fte = chlnhp_earnfte
	(sd) sd_tot = ch_lntot_earn sd_hp = ch_lnhp_earn sd_hpmon = ch_lnhpmon_earn sd_fte = chlnhp_earnfte, 
	by(age_cat female job_ch);
label var num_job "No. of ppl in category (rr3)";
label var job_ch "Dummy for worked for a different highest paying pent to previous year";

/* use line below to conf;
grr num_job, base(3) seed(643109);
*/
* drop line below when using conf code;
gen rr3num_job = num_job; 


foreach var in num_job {;
	replace rr3`var' = .s if `var'<6;
};

replace num_job = rr3num_job;

drop rr3*;
des;

export excel using "`outfile'.xlsx", sheet("2 earnch b") sheetrep first(var) miss("S"); 






********************************************************************************;
* (3) Changes in average annual earnings for men and women by business cycle stage;
********************************************************************************;

local varlist "snz_uid max_gross_earn_yr max_mon_wkd hp_pent age_cat female year Q_rest lnWpm_hp indiv_avfte_hp";
use `varlist' using GWgap_pr_IDI_v4, clear;

* merging on total gross earnings in the year;

merge 1:1 snz_uid year hp_pent using GWgap_IDI_indivregdata, keep(master match) keepus(tot_gross_earn_yr) nogen;

* a person is considered to have a job change if their hp_pent differs from in the previous year;

sort snz_uid year;

gen job_ch = 0 if snz_uid== snz_uid[_n-1] & (year==year[_n-1] + 1) & hp_pent!=""
	& hp_pent==hp_pent[_n-1];
replace job_ch = 1 if snz_uid== snz_uid[_n-1] & (year==year[_n-1] + 1) & hp_pent!="" 
	& hp_pent[_n-1]!="" & hp_pent!=hp_pent[_n-1];
	
	
* constructing change in earnings;

gen lnmax_gross_earn_yr = ln(max_gross_earn_yr);

gen mon_earn_hp = max_gross_earn_yr/max_mon_wkd;
label var mon_earn_hp "Average monthly earnings from hp pent in months worked for hp pent";

gen lnmon_earn_hp = ln(mon_earn_hp);
label var lnmon_earn_hp "Average monthly earnings from hp pent in months worked for hp pent (ln)";

gen lntot_gross_earn_yr = ln(tot_gross_earn_yr);

gen ch_lntot_earn = lntot_gross_earn_yr - lntot_gross_earn_yr[_n-1] if job_ch!=.;
gen ch_lnhp_earn = lnmax_gross_earn_yr - lnmax_gross_earn_yr[_n-1] if job_ch!=.;
gen ch_lnhpmon_earn = lnmon_earn_hp - lnmon_earn_hp[_n-1] if job_ch!=.;

gen lnhp_earnfte = ln(exp(lnWpm_hp)/indiv_avfte_hp);
label var lnhp_earnfte "Average monthly earnings/FTEs from hp pent (ln)";

gen chlnhp_earnfte = lnhp_earnfte - lnhp_earnfte[_n-1] if job_ch!=.;

keep if job_ch!=.;

* keeping obesrvations in restricted sample only;

keep if Q_rest==1;


* creating business cycle measure;

gen bcyc = 0 if year==2006 | year==2007 | year==2008; * unemployment rate <4%;
replace bcyc = 1 if year==2002 | year==2003 | year==2004 | year==2005 | year==2009 | year==2015 | year==2016;
replace bcyc = 2 if year==2001 | year==2010 | year==2011 | year==2012 | year==2013 | year==2014;

label define bcyc 0 "Unemployment rate <4%" 1 "Unemployment rate 4-6%" 2 "Unemployment rate >6%";
label val bcyc bcyc;
label var bcyc "Unemployment rate in the financial year";
notes bcyc: The rates are actually from the December year that overlaps most;


* replicating all obs to make an additional age category, total;

expand 2, gen(new);
replace age_cat = 9 if new==1;
drop new;


* collapsing to the gender-bcyc-age_cat level;

drop if year==2001;
gen i = 1;
collapse (sum) num_job = i 
		(p5) p5_tot = ch_lntot_earn p5_hp = ch_lnhp_earn p5_hpmon = ch_lnhpmon_earn p5_fte = chlnhp_earnfte
	(p10) p10_tot = ch_lntot_earn p10_hp = ch_lnhp_earn p10_hpmon = ch_lnhpmon_earn p10_fte = chlnhp_earnfte
	(p25) p25_tot = ch_lntot_earn p25_hp = ch_lnhp_earn p25_hpmon = ch_lnhpmon_earn p25_fte = chlnhp_earnfte
	(p50) p50_tot = ch_lntot_earn p50_hp = ch_lnhp_earn p50_hpmon = ch_lnhpmon_earn p50_fte = chlnhp_earnfte
	(p75) p75_tot = ch_lntot_earn p75_hp = ch_lnhp_earn p75_hpmon = ch_lnhpmon_earn p75_fte = chlnhp_earnfte
	(p90) p90_tot = ch_lntot_earn p90_hp = ch_lnhp_earn p90_hpmon = ch_lnhpmon_earn p90_fte = chlnhp_earnfte
	(p95) p95_tot = ch_lntot_earn p95_hp = ch_lnhp_earn p95_hpmon = ch_lnhpmon_earn p95_fte = chlnhp_earnfte
	(mean) m_tot = ch_lntot_earn m_hp = ch_lnhp_earn m_hpmon = ch_lnhpmon_earn m_fte = chlnhp_earnfte
	(sd) sd_tot = ch_lntot_earn sd_hp = ch_lnhp_earn sd_hpmon = ch_lnhpmon_earn sd_fte = chlnhp_earnfte,  
	by(female job_ch bcyc age_cat);
label var num_job "No. of ppl in category (rr3)";
label var job_ch "Dummy for worked for a different highest paying pent to previous year";

/* use next line to conf;
grr num_job, base(3) seed(613109);
*/
* drop next line when using conf code;
gen rr3num_job = num_job;


foreach var in num_job {;
	replace rr3`var' = .s if `var'<6;
};

replace num_job = rr3num_job;

drop rr3*;
des;

export excel using "`outfile'.xlsx", sheet("3 earnchb b") sheetrep first(var) miss("S"); 







********************************************************************************;
* (4) Tenure in job for men and women by age group;
********************************************************************************;

local varlist "snz_uid age_cat female wkd_hpp_1ya wkd_hpp_2ya year Q_rest";
use `varlist' using GWgap_pr_IDI_v4, clear;

keep if Q_rest==1;

gen ten = wkd_hpp_1ya + wkd_hpp_2ya;


* replicating all obs to make an additional age category, total;

expand 2, gen(new);
replace age_cat = 9 if new==1;
drop new;

* collapsing to the gender-age level;

collapse (count) obs = year, by(age_cat female ten);


label var obs "No. of ppl in category (rr3)"; 

/* use next line to conf data;
grr obs, base(3) seed(642053);
*/
* drop next line when using conf code;
gen rr3obs = obs;

foreach var in obs {;
	replace rr3`var' = .s if `var'<6;
};

replace obs = rr3obs;

drop rr3*;

* reshaping so person counts with different tenures are columns;

reshape wide obs, i(age_cat female) j(ten);

label var obs0 "Number of individuals in first year with employer (rr3)";
label var obs1 "Number of individuals in second year with employer (rr3)";
label var obs2 "Number of individuals in third or subsequent year with employer (rr3)";

des;

export excel using "`outfile'.xlsx", sheet("4 tenure b") sheetrep first(var) miss("S"); 








********************************************************************************;
* (5) Changes in firm CCK male FE when changing employer for men and women by age group;
********************************************************************************;

local varlist "snz_uid ffe_m hp_pent age_cat female year Q_rest";
use `varlist' using GWgap_pr_IDI_v4, clear;

* a person is considered to have a job change if their hp_pent differs from in the previous year;

sort snz_uid year;

gen job_ch = 0 if snz_uid== snz_uid[_n-1] & (year==year[_n-1] + 1) & hp_pent!=""
	& hp_pent==hp_pent[_n-1];
replace job_ch = 1 if snz_uid== snz_uid[_n-1] & (year==year[_n-1] + 1) & hp_pent!="" 
	& hp_pent[_n-1]!="" & hp_pent!=hp_pent[_n-1];
	

	
* constructing change in CCK firm male FE;

gen ch_ffe_m = ffe_m - ffe_m[_n-1] if job_ch==1;

keep if job_ch==1 & ch_ffe_m<.;

keep if Q_rest==1;


* replicating all obs to make an additional age category, total;

expand 2, gen(new);
replace age_cat = 9 if new==1;
drop new;


* collapsing to the gender-age level;

gen i = 1;
collapse (sum) num_job = i 
	(p5) p5_ch = ch_ffe_m 
	(p10) p10_ch = ch_ffe_m 
	(p25) p25_ch = ch_ffe_m
	(p50) p50_ch = ch_ffe_m 
	(p75) p75_ch = ch_ffe_m 
	(p90) p90_ch = ch_ffe_m 
	(p95) p95_ch = ch_ffe_m 
	(mean) m_ch = ch_ffe_m 
	(sd) sd_ch = ch_ffe_m, 
	by(age_cat female);
label var num_job "No. of ppl in category (rr3)";

/* use next line to conf data;
grr num_job, base(3) seed(643109);
*/
* drop next line when using conf code;
gen rr3num_job = num_job;


foreach var in num_job {;
	replace rr3`var' = .s if `var'<6;
};

replace num_job = rr3num_job;

rename num_job obs;

drop rr3*;
des;

export excel using "`outfile'.xlsx", sheet("5 chffe b") sheetrep first(var) miss("S"); 





********************************************************************************;
* (6) Distribution of difference betewen male and female CCK ffe for men and women;
********************************************************************************;

local varlist "snz_uid ffe_m ffe_f hp_pent female Q_rest";
use `varlist' using GWgap_pr_IDI_v4, clear;

keep if Q_rest==1;


* constructing difference in CCK fe;

gen ffe_mf = ffe_m - ffe_f;
label var ffe_mf "CCK male firm FE minus CCK female firm FE";
	
drop if ffe_mf==.;


* collapsing to the gender level;

gen i = 1;
collapse (sum) num_job = i 
	(p5) p5_ch = ffe_mf 
	(p10) p10_ch = ffe_mf 
	(p25) p25_ch = ffe_mf
	(p50) p50_ch = ffe_mf 
	(p75) p75_ch = ffe_mf 
	(p90) p90_ch = ffe_mf 
	(p95) p95_ch = ffe_mf 
	(mean) m_ch = ffe_mf 
	(sd) sd_ch = ffe_mf, 
	by(female);
label var num_job "No. of ppl in category (rr3)";

/* use next line to conf data;
grr num_job, base(3) seed(243109);
*/
* drop next line when using conf code;
gen rr3num_job = num_job;

foreach var in num_job {;
	replace rr3`var' = .s if `var'<6;
};

replace num_job = rr3num_job;

rename num_job obs;

drop rr3*;
des;

export excel using "`outfile'.xlsx", sheet("6 difffe b") sheetrep first(var) miss("S"); 






********************************************************************************;
* (7) Distribution of difference betewen male and female CCK ffe for men and women by firm size;
********************************************************************************;

local varlist "snz_uid ffe_m ffe_f hp_pent female L_hc Q_rest";
use `varlist' using GWgap_pr_IDI_v4, clear;

keep if Q_rest==1;


* firm size categories;

gen L_hc_cat = 0 if L_hc<20;
replace L_hc_cat = 1 if L_hc>=20 & L_hc<100;
replace L_hc_cat = 2 if L_hc>=100 & L_hc<500;
replace L_hc_cat = 3 if L_hc>=500 & L_hc<.;
label define L_hc_cat 0 "Firm size (0,20) (head count)" 1 "[20,100)" 2 "[100,500)" 3 "[500, inf)";
label val L_hc_cat L_hc_cat;
label var L_hc_cat "Firm size category (head count)";


* constructing difference in CCK fe;

gen ffe_mf = ffe_m - ffe_f;
label var ffe_mf "CCK male firm FE minus CCK female firm FE";
	
drop if ffe_mf==.;



* collapsing to the gender-firm size level;

gen i = 1;
collapse (sum) num_job = i 
	(p5) p5_ch = ffe_mf 
	(p10) p10_ch = ffe_mf 
	(p25) p25_ch = ffe_mf
	(p50) p50_ch = ffe_mf 
	(p75) p75_ch = ffe_mf 
	(p90) p90_ch = ffe_mf 
	(p95) p95_ch = ffe_mf 
	(mean) m_ch = ffe_mf 
	(sd) sd_ch = ffe_mf, 
	by(female L_hc_cat);
label var num_job "No. of ppl in category (rr3)";

/* use next line to conf data;
grr num_job, base(3) seed(243109);
*/
* drop next line when using conf code;
gen rr3num_job = num_job;


foreach var in num_job {;
	replace rr3`var' = .s if `var'<6;
};

replace num_job = rr3num_job;

rename num_job obs;

drop rr3*;
des;

export excel using "`outfile'.xlsx", sheet("7 difffeb b") sheetrep first(var) miss("S"); 






********************************************************************************;
* (8 & 9) Correlation betewen difference between male and female CCK ffe for men and women and % female and other fes;
********************************************************************************;

local varlist "snz_uid ffe_m ffe_f ffe_t wfe_m wfe_f wfe_t hp_pent year L_hc Q_rest";
use `varlist' if Q_rest==1 using GWgap_pr_IDI_v4, clear;

rename hp_pent pent;


* merging in share female;

merge m:1 pent year using GWgap_pr_firm_v4, keep(master match) keepus(L_hc_f L_hc_m) nogen;
gen s_f = L_hc_f/(L_hc_f + L_hc_m);
label var s_f "Share of female head count";
drop L_hc_f L_hc_m;
rename pent hp_pent;


* collapsing to one observation per pent-year;

collapse (mean) ffe_f ffe_m ffe_t wfe_f wfe_m wfe_t s_f, by(year hp_pent L_hc);
duplicates drop;


* constructing difference in CCK fe;

gen ffe_mf = ffe_m - ffe_f;
label var ffe_mf "CCK male firm FE minus CCK female firm FE";
	
drop if ffe_mf==.;


* correlation between FFE diff & gender share, weighted and unweighted;

gen L_hc_rd = round(L_hc);

* paste these into Excel on worksheet 8;

corr s_f ffe_m ffe_f ffe_mf ffe_t wfe_f wfe_m wfe_t;
corr s_f ffe_m ffe_f ffe_mf ffe_t wfe_f wfe_m wfe_t [fweight = L_hc_rd];


